I: Forecasting with moving average models
1: The simple moving average
library(TSstudio)
## Warning: package 'TSstudio' was built under R version 4.3.3
data("Coffee_Prices")
ts_info(Coffee_Prices)
## The Coffee_Prices series is a mts object with 2 variables and 701 observations
## Frequency: 12
## Start time: 1960 1
## End time: 2018 5
robusta <- Coffee_Prices[,1]
library(plotly)
## Warning: package 'plotly' was built under R version 4.3.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
ts_plot(robusta,
title = "The Robusta Coffee Monthly Prices",
Ytitle = "Price in USD",
Xtitle = "Year")
2
library(tidyr)
sma_forecast <- function(df, h, m, w = NULL){
# Error handling
if(h > nrow(df)){
stop("The length of the forecast horizon must be shorter than the length of the series")}
if(m > nrow(df)){
stop("The length of the rolling window must be shorter than the length of the series")}
if(!is.null(w)){
if(length(w) != m){
stop("The weight argument is not aligned with the length of the rolling window")
} else if(sum(w) !=1){
stop("The sum of the average weight is different than 1")
}
}
# Setting the average weigths
if(is.null(w)){
w <- rep(1/m, m)
}
# Setting the data frame
#-----------------------
# Changing the Date object column name
names(df)[1] <- "date"
# Setting the training and testing partition
# according to the forecast horizon
df$type <- c(rep("train", nrow(df) - h),
rep("test", h))
# Spreading the table by the partition type
df1 <- df %>% spread(key = type, value = y)
# Create the target variable
df1$yhat <- df1$train
# Simple moving average function
for(i in (nrow(df1) - h + 1):nrow(df1)){
r <- (i-m):(i-1)
df1$yhat[i] <- sum(df1$yhat[r] * w)
}
# dropping from the yhat variable the actual values
# that were used for the rolling window
df1$yhat <- ifelse(is.na(df1$test), NA, df1$yhat)
df1$y <- ifelse(is.na(df1$test), df1$train, df1$test)
return(df1)
}
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.3.3
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
robusta_df <- ts_to_prophet(robusta)
robusta_fc_m1 <- sma_forecast(robusta_df, h = 24, m = 1)
robusta_fc_m6 <- sma_forecast(robusta_df, h = 24, m = 6)
robusta_fc_m12 <- sma_forecast(robusta_df, h = 24, m = 12)
robusta_fc_m24 <- sma_forecast(robusta_df, h = 24, m = 24)
robusta_fc_m36 <- sma_forecast(robusta_df, h = 24, m = 36)
library(plotly)
plot_ly(data = robusta_df[650:nrow(robusta_df),], x = ~ ds, y = ~ y,
type = "scatter", mode = "lines",
name = "Actual") %>%
add_lines(x = robusta_fc_m1$date, y = robusta_fc_m1$yhat,
name = "SMA - 1", line = list(dash = "dash")) %>%
add_lines(x = robusta_fc_m6$date, y = robusta_fc_m6$yhat,
name = "SMA - 6", line = list(dash = "dash")) %>%
add_lines(x = robusta_fc_m12$date, y = robusta_fc_m12$yhat,
name = "SMA - 12", line = list(dash = "dash")) %>%
add_lines(x = robusta_fc_m24$date, y = robusta_fc_m24$yhat,
name = "SMA - 24", line = list(dash = "dash")) %>%
add_lines(x = robusta_fc_m36$date, y = robusta_fc_m36$yhat,
name = "SMA - 36", line = list(dash = "dash")) %>%
layout(title = "Forecasting the Robusta Coffee Monthly Prices",
xaxis = list(title = ""),
yaxis = list(title = "USD per Kg."))
3: Weighted moving average
data(USgas)
USgas_df <- ts_to_prophet(USgas)
USgas_fc_m12a <- sma_forecast(USgas_df,
h = 24,
m = 12,
w = c(1, rep(0,11)))
USgas_fc_m12b <- sma_forecast(USgas_df,
h = 24,
m = 12,
w = c(0.8, rep(0,10), 0.2))
plot_ly(data = USgas_df[190:nrow(USgas_df),], x = ~ ds, y = ~ y,
type = "scatter", mode = "lines",
name = "Actual") %>%
add_lines(x = USgas_fc_m12a$date, y = USgas_fc_m12a$yhat,
name = "WMA - Seasonal Lag", line = list(dash = "dash")) %>%
add_lines(x = USgas_fc_m12b$date, y = USgas_fc_m12b$yhat,
name = "WMA - 12 (0.2/0.8)", line = list(dash = "dash")) %>%
layout(title = "Forecasting the Monthly Consumption of Natural Gas in the US",
xaxis = list(title = ""),
yaxis = list(title = "Billion Cubic Feet"))
II: Forecasting with exponential smoothing
1: Simple exponentian smoothing model
alpha_df <- data.frame(index = seq(from = 1, to = 15, by = 1),
power = seq(from = 14, to = 0, by = -1))
alpha_df$alpha_0.01 <- 0.01 * (1 - 0.01) ^ alpha_df$power
alpha_df$alpha_0.2 <- 0.2 * (1 - 0.2) ^ alpha_df$power
alpha_df$alpha_0.4 <- 0.4 * (1 - 0.4) ^ alpha_df$power
alpha_df$alpha_0.6 <- 0.6 * (1 - 0.6) ^ alpha_df$power
alpha_df$alpha_0.8 <- 0.8 * (1 - 0.8) ^ alpha_df$power
alpha_df$alpha_1 <- 1 * (1 - 1) ^ alpha_df$power
plot_ly(data = alpha_df) %>%
add_lines(x = ~ index, y = ~ alpha_0.01, name = "alpha = 0.01") %>%
add_lines(x = ~ index, y = ~ alpha_0.2, name = "alpha = 0.2") %>%
add_lines(x = ~ index, y = ~ alpha_0.4, name = "alpha = 0.4") %>%
add_lines(x = ~ index, y = ~ alpha_0.6, name = "alpha = 0.6") %>%
add_lines(x = ~ index, y = ~ alpha_0.8, name = "alpha = 0.8") %>%
add_lines(x = ~ index, y = ~ alpha_1, name = "alpha = 1") %>%
layout(title = "Decay Rate of the SES Weights",
xaxis = list(title = "Index"),
yaxis = list(title = "Weight"))
2: Forecasting with the ses function
robusta_par <- ts_split(robusta, sample.out = 12)
train <- robusta_par$train
test <- robusta_par$test
library(forecast)
## Warning: package 'forecast' was built under R version 4.3.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
fc_ses <- ses(train, h = 12, initial = "optimal")
fc_ses$model
## Simple exponential smoothing
##
## Call:
## ses(y = train, h = 12, initial = "optimal")
##
## Smoothing parameters:
## alpha = 0.9999
##
## Initial states:
## l = 0.6957
##
## sigma: 0.161
##
## AIC AICc BIC
## 1989.646 1989.681 2003.252
test_forecast(actual = robusta,
forecast.obj = fc_ses,
test = test) %>%
layout(title = "Robusta Coffee Prices Forecast vs. Actual",
xaxis = list(range = c(2010, max(time(robusta)))),
yaxis = list(range = c(1, 3)))
3
plot_forecast(fc_ses) %>%
add_lines(x = time(test) + deltat(test),
y = as.numeric(test),
name = "Testing Partition") %>%
layout(title = "Robusta Coffee Prices Forecast vs. Actual",
xaxis = list(range = c(2010, max(time(robusta)) +
deltat(robusta))),
yaxis = list(range = c(0, 4)))
4: Model optimization with grid search
robusta_par1 <- ts_split(robusta, sample.out = 24)
train1 <- robusta_par1$train
test1 <- ts_split(robusta_par1$test, sample.out = 12)$train
robusta_par2 <- ts_split(robusta, sample.out = 12)
train2 <- robusta_par2$train
valid <- robusta_par2$test
alpha <- seq(from = 0, to = 1, by = 0.01)
alpha[1] <- 0.001
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
ses_grid <- lapply(alpha, function(i){
md1 <- md_accuracy1 <- md2 <- md_accuracy2 <- results <- NULL
md1 <- ses(train1, h = 12, alpha = i, initial = "simple")
md_accuracy1 <- accuracy(md1, test1)
md2 <- ses(train2, h = 12, alpha = i, initial = "simple")
md_accuracy2 <- accuracy(md2, valid)
resutls <- data.frame(alpha = i,
train = md_accuracy1[9],
test = md_accuracy1[10],
valid = md_accuracy2[10])
}) %>% bind_rows()
plot_ly(data = ses_grid, x = ~ alpha, y = ~ train,
line = list(color = 'rgb(205, 12, 24)'),
type = "scatter",
mode = "lines",
name = "Training") %>%
add_lines(x = ~ alpha, y = ~ test, line = list(color = "rgb(22, 96, 167)", dash = "dash"), name= "Testing") %>%
add_lines(x = ~ alpha, y = ~ valid, line = list(color = "green", dash = "dot"), name = "Validation") %>%
layout(title = "SES Model Grid Search Results",
yaxis = list(title = "MAPE (%)"))
5: Forecasting with the holt function
library(Quandl)
## Warning: package 'Quandl' was built under R version 4.3.3
## Loading required package: xts
## Warning: package 'xts' was built under R version 4.3.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.3.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
gdp <- Quandl("FRED/GDP", start_date = "2010-01-01", type = "ts")
ts_info(gdp)
## The gdp series is a ts object with 1 variable and 48 observations
## Frequency: 4
## Start time: 2010 1
## End time: 2021 4
ts_plot(gdp,
title = "Gross Domestic Product",
Ytitle = "Billions of Dollars",
Xtitle = "Source: U.S. Bureau of Economic Analysis / fred.stlouisfed.org")
6
gdp_par <- ts_split(gdp, sample.out = 8)
train <- gdp_par$train
test <- gdp_par$test
fc_holt <- holt(train, h = 8, initial = "optimal")
fc_holt$model
## Holt's method
##
## Call:
## holt(y = train, h = 8, initial = "optimal")
##
## Smoothing parameters:
## alpha = 0.9985
## beta = 0.0941
##
## Initial states:
## l = 14603.0487
## b = 161.9411
##
## sigma: 80.9869
##
## AIC AICc BIC
## 504.8838 506.6485 513.3282
accuracy(fc_holt, test)
## ME RMSE MAE MPE MAPE MASE
## Training set 10.99164 76.83093 61.7626 0.05241543 0.3460477 0.08790283
## Test set -665.87355 1141.19097 854.8069 -3.29699460 4.0874667 1.21659297
## ACF1 Theil's U
## Training set 0.0192117 NA
## Test set 0.2975704 1.08055
test_forecast(gdp, forecast.obj = fc_holt, test = test)
7
fc_holt_exp <- holt(train,
h = 8,
beta = 0.75 ,
initial = "optimal",
exponential = TRUE)
fc_holt_exp$model
## Holt's method with exponential trend
##
## Call:
## holt(y = train, h = 8, initial = "optimal", exponential = TRUE,
##
## Call:
## beta = 0.75)
##
## Smoothing parameters:
## alpha = 0.781
## beta = 0.75
##
## Initial states:
## l = 14572.2291
## b = 1.0142
##
## sigma: 0.0053
##
## AIC AICc BIC
## 515.9291 517.0720 522.6847
accuracy(fc_holt_exp, test)
## ME RMSE MAE MPE MAPE MASE
## Training set -2.792636 90.20753 74.14323 -0.01756593 0.4165413 0.1055234
## Test set -703.014806 1151.21262 858.97402 -3.46212317 4.1130726 1.2225238
## ACF1 Theil's U
## Training set -0.1541725 NA
## Test set 0.2834645 1.090637
test_forecast(gdp, forecast.obj = fc_holt, test = test)
III: Holt-Winters model
1
data(USgas)
decompose(USgas) %>% plot()

2
USgas_par <- ts_split(USgas, 12)
train <- USgas_par$train
test <- USgas_par$test
md_hw <- HoltWinters(train)
md_hw
## Holt-Winters exponential smoothing with trend and additive seasonal component.
##
## Call:
## HoltWinters(x = train)
##
## Smoothing parameters:
## alpha: 0.371213
## beta : 0
## gamma: 0.4422456
##
## Coefficients:
## [,1]
## a 2491.9930104
## b -0.1287005
## s1 32.0972651
## s2 597.1088003
## s3 834.9628439
## s4 353.8593860
## s5 318.1927058
## s6 -173.0721496
## s7 -346.6229990
## s8 -329.7169608
## s9 -112.1664217
## s10 -140.3186476
## s11 -324.5343787
## s12 -243.9334551
fc_hw <- forecast(md_hw, h = 12)
accuracy(fc_hw, test)
## ME RMSE MAE MPE MAPE MASE
## Training set 7.828361 115.2062 87.67420 0.2991952 4.248131 0.7614222
## Test set 51.013877 115.1555 98.06531 1.7994297 3.766099 0.8516656
## ACF1 Theil's U
## Training set 0.21911103 NA
## Test set -0.01991923 0.3652142
test_forecast(actual = USgas,
forecast.obj = fc_hw,
test = test)
3
# shallow_grid <- ts_grid(train,
# model = "HoltWinters",
# periods = 6,
# window_space = 6,
# window_test = 12,
# hyper_params = list(alpha = seq(0,1,0.1),
# beta = seq(0,1,0.1),
# gamma = seq(0,1,0.1)),
# parallel = FALSE,
# n.cores = 8)
# shallow_grid$grid_df[1:10,]
#
# plot_grid(shallow_grid)
4
# deep_grid <- ts_grid(train,
# model = "HoltWinters",
# periods = 6,
# window_space = 6,
# window_test = 12,
# hyper_params = list(alpha = seq(0.1,0.5,0.01),
# beta = seq(0,0.1,0.01),
# gamma = seq(0.2,0.4,0.01)),
# parallel = FALSE,
# n.cores = 8)
# plot_grid(deep_grid)
5
#plot_grid(deep_grid, type = "3D", top = 250)
6
# md_hw_grid <- HoltWinters(train, #
# alpha = deep_grid$alpha,
# beta = deep_grid$beta,
# gamma = deep_grid$gamma)
#
# fc_hw_grid <- forecast(md_hw_grid, h = 12)
#
# accuracy(fc_hw_grid, test)
#
# test_forecast(actual = USgas,
# forecast.obj = fc_hw_grid,
# test = test)